{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cforest.forest import CausalForest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load simulate.py\n",
    "\"\"\"\n",
    "This module provides functions for the simulation of data which is used in the\n",
    "monte carlo simulation.\n",
    "\"\"\"\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def simulate(\n",
    "    nobs,\n",
    "    nfeatures,\n",
    "    coefficients,\n",
    "    error_var,\n",
    "    seed,\n",
    "    propensity_score=None,\n",
    "    alpha=0.8,\n",
    "):\n",
    "    \"\"\"Simulate data with heterogenous treatment effects.\n",
    "\n",
    "    Simulate outcomes y (length *nobs*), features X (*nobs* x *nfeatures*) and\n",
    "    treatment status treatment_status (length *nobs*) using the potential\n",
    "    outcome framework from Neyman and Rubin.\n",
    "        We simulate the data by imposing a linear model with two relevant\n",
    "    features plus a treatment effect. However, we return a design matrix with\n",
    "    *nfeatures* from which only the two used in the simulation are relevant.\n",
    "        The model looks the following:\n",
    "    y(0)_i = coef[0] + X_1i coef[1] +  + X_2i coef[2] + error_i,\n",
    "    y(1)_i = coef[0] + X_1i coef[1] +  + X_2i coef[2]  + treatment_i + error_i,\n",
    "    where coef[0], coef[1] and coef[2] denote the intercept and slopes\n",
    "    respectively, and treatment_i = treatment(X_i) denotes the heterogenous\n",
    "    treatment effect, which is solely dependent on the location of the\n",
    "    individual in the feature space of the first two dimensions, i.e. as with\n",
    "    the linear model it only depends on X_1i and X_2i; at last y(0)_i, y(1)_i\n",
    "    denote the potential outcomes of individual i.\n",
    "\n",
    "    Args:\n",
    "        nobs (int): positive integer denoting the number of observations.\n",
    "        nfeatures (int): positiv integer denoting the number of features. Must\n",
    "            be greater than or equal to 2.\n",
    "        coefficients: Coefficients for the linear model, first value denotes\n",
    "            the intercept, second and third the slope for the second and third\n",
    "            feature. Must be convertable to a np.ndarray.\n",
    "        error_var (float): positive float denoting the error variance.\n",
    "        seed (int): seed for the random number generator.\n",
    "        propensity_score (np.array): array containing propensity scores, must\n",
    "            be of length *nobs*. If None, will be set to 0.5 for each\n",
    "            individual.\n",
    "        alpha (float): positive parameter influencing the shape of the\n",
    "            function. Default is 0.8.\n",
    "\n",
    "    Returns:\n",
    "        X (np.array): [nobs x nfeatures] numpy array with simulated features.\n",
    "        t (np.array): [nobs] boolean numpy array containing treatment\n",
    "            status of individuals.\n",
    "        y (np.array): [nobs] numpy array containing \"observed\" outcomes.\n",
    "\n",
    "    Raises:\n",
    "        ValueError, if dimensions mismatch or data type of inputs is incorrect.\n",
    "    \"\"\"\n",
    "    np.random.seed(seed)\n",
    "\n",
    "    # Assert input values\n",
    "    if not float.is_integer(float(nobs)):\n",
    "        raise ValueError(\"Argument *nobs* is not an integer.\")\n",
    "    if not float.is_integer(float(nfeatures)):\n",
    "        raise ValueError(\"Argument *nfeatures* is not an integer.\")\n",
    "\n",
    "    coefficients = np.array(coefficients)\n",
    "    if nfeatures < 2:\n",
    "        raise ValueError(\n",
    "            \"Argument *nfeatures* must be greater or equal than\" \"two\"\n",
    "        )\n",
    "\n",
    "    if len(coefficients) != 3:\n",
    "        raise ValueError(\"Argument *coefficients* needs to be of length 3.\")\n",
    "\n",
    "    # Simulate treatment status\n",
    "    if propensity_score is None:\n",
    "        treatment_status = np.random.binomial(1, 0.5, nobs)\n",
    "    else:\n",
    "        if len(propensity_score) != nobs:\n",
    "            raise ValueError(\n",
    "                \"Dimensions of argument *propensity_score* do not\"\n",
    "                \"match with *nobs*.\"\n",
    "            )\n",
    "        treatment_status = np.random.binomial(1, propensity_score, nobs)\n",
    "\n",
    "    error = np.random.normal(0, np.sqrt(error_var), nobs)\n",
    "    X = np.random.uniform(-15, 15, (nobs, nfeatures))\n",
    "    y = _compute_outcome(X, coefficients, treatment_status, error, alpha)\n",
    "    t = np.array(treatment_status, dtype=bool)\n",
    "\n",
    "    return X, t, y\n",
    "\n",
    "\n",
    "def _compute_outcome(X, coefficients, treatment_status, error, alpha):\n",
    "    \"\"\"Compute observed potential outcome.\n",
    "\n",
    "    Simulates potential outcomes and returns outcome corresponding to the\n",
    "    treatment status.\n",
    "\n",
    "    Args:\n",
    "        X (np.array): design array containing features.\n",
    "        coefficients (np.array): coefficient array containing intercept and\n",
    "            two slope parameter.\n",
    "        treatment_status (np.array): array containing the treatment status\n",
    "        error (np.array): array containing the error terms for the linear model\n",
    "        alpha (float): positive parameter influencing the shape of the function\n",
    "\n",
    "    Returns:\n",
    "        y (np.array): the observed potential outcomes.\n",
    "\n",
    "    >>> import numpy as np\n",
    "    >>> X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])\n",
    "    >>> coefficients = np.array([0, 1, -1])\n",
    "    >>> treatment_status = np.array([True, False, True])\n",
    "    >>> error = np.array([-0.1, 0, 0.1])\n",
    "    >>> alpha = 0.2\n",
    "    >>> _compute_outcome(X, coefficients, treatment_status, error, alpha)\n",
    "    array([-0.78932461, -1.0, -0.24908566])\n",
    "    \"\"\"\n",
    "    #baseline_model = coefficients[0] + np.dot(X[:, :2], coefficients[1:])\n",
    "    baseline_model = 0\n",
    "    y0 = baseline_model + error\n",
    "\n",
    "    treat_effect = true_treatment_effect(X[:, 0], X[:, 1], alpha)\n",
    "    y1 = y0 + treat_effect\n",
    "\n",
    "    y = (1 - treatment_status) * y0 + treatment_status * y1\n",
    "    return y\n",
    "\n",
    "\n",
    "def true_treatment_effect(x, y, alpha=0.8, scale=5):\n",
    "    \"\"\"Compute individual treatment effects.\n",
    "\n",
    "    Computes individual treatment effect conditional on features *X* using\n",
    "    parameter *alpha* to determine the smoothness of the conditional\n",
    "    treatment function and *scale* to determine the scaling.\n",
    "\n",
    "    Args:\n",
    "        X (np.array): array with n rows and 2 columns depicting two features\n",
    "            for n individuals.\n",
    "        alpha (float): positive parameter influencing the shape of the\n",
    "            function. Defaults to 0.8.\n",
    "        scale (float): positive parameter determining the scaling of the\n",
    "            function. Defaults to 5. With scale=x the range of the function is\n",
    "            [0, x].\n",
    "\n",
    "    Returns:\n",
    "        result (np.array): array of length n containing individual treatment\n",
    "            effects.\n",
    "\n",
    "    >>> import numpy as np\n",
    "    >>> X = np.array([[0.5, 0.75], [0.25, 1]])\n",
    "    >>> alpha = 0.2\n",
    "    >>> _true_treatment_effect(X, alpha)\n",
    "    array([0.26475042, 0.26442005])\n",
    "    \"\"\"\n",
    "    denominatorx = 1 + np.exp(-alpha * (x - 1 / 3))\n",
    "    fractionx = 1 / denominatorx\n",
    "\n",
    "    denominatory = 1 + np.exp(-alpha * (y - 1 / 3))\n",
    "    fractiony = 1 / denominatory\n",
    "\n",
    "    result = scale * fractionx * fractiony\n",
    "    return result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "simparams = {\n",
    "    'nobs': 10000,\n",
    "    'nfeatures': 2,\n",
    "    'coefficients': [0.5, 0, 0],\n",
    "    'error_var': 0.1,\n",
    "    'seed': 1,\n",
    "    'alpha': 0.975\n",
    "}\n",
    "\n",
    "\n",
    "X, t, y = simulate(**simparams)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "cf = CausalForest(\n",
    "    num_trees=50,\n",
    "    split_ratio=0.5,\n",
    "    min_leaf=5,\n",
    "    max_depth=20,\n",
    "    #use_transformed_outcomes=True,\n",
    "    num_workers=4,\n",
    "    seed_counter=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<cforest.forest.CausalForest at 0x7f15e59b0690>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cf.fit(X, t, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "XX = np.array([\n",
    "    [0.5, 0.75], \n",
    "    [0.25, 1],\n",
    "    [0.9, 0.9],\n",
    "    [0.1, 0.1]\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.60700811, 0.92394599, 1.71216153, 0.76622235])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cf.predict(XX)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "causal-forest",
   "language": "python",
   "name": "causal-forest"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
